import_data("jake_gyllenhaal")
NAs introduced by coercion
filmes = read_imported_data()
filmes %>%
glimpse()
Observations: 20
Variables: 5
$ avaliacao <int> 92, 68, 73, 52, 73, 59, 82, 85, 92, 49, 35, 64, 47, 90, 87, 61, 62, 44, ...
$ filme <chr> "Stronger", "Life", "Nocturnal Animals", "Demolition", "Everest", "South...
$ papel <chr> "Jeff Bauman", "David Jordan", "Tony HastingsEdward Sheffield", "Davis M...
$ bilheteria <dbl> 4.2, 30.2, 10.7, 1.7, 46.6, 42.4, 61.0, 39.1, 54.7, 33.3, 90.8, 28.6, 9....
$ ano <int> 2017, 2017, 2016, 2016, 2015, 2015, 2013, 2012, 2011, 2010, 2010, 2009, ...
Data Overview
Bilheteria
filmes %>%
ggplot(aes(x = ano, y = bilheteria)) +
geom_point(size = 4, color = paleta[1])

filmes %>%
ggplot(aes(x = bilheteria)) +
geom_histogram(binwidth = 10, boundary = 0,
fill = "grey", color = "black") +
geom_rug(size = .5) +
scale_x_continuous(breaks=seq(0,210,10))

filmes %>%
group_by(filme) %>%
ggplot(aes(sample=bilheteria)) +
stat_qq()

p = filmes %>%
ggplot(aes(x = "", y = bilheteria, label = filme)) +
geom_jitter(width = .05, alpha = .3, size = 3) +
labs(x = "")
ggplotly(p)
Avaliação
filmes %>%
ggplot(aes(x = ano, y = avaliacao)) +
geom_point(size = 4, color = paleta[1]) +
scale_y_continuous(limits = c(0, 100))

filmes %>%
ggplot(aes(x = avaliacao)) +
geom_histogram(binwidth = 10, boundary = 0,
fill = paleta[3], color = "black") +
geom_rug(size = .5)

filmes %>%
group_by(filme) %>%
ggplot(aes(sample=avaliacao)) +
stat_qq()

p = filmes %>%
ggplot(aes(x = "", y = avaliacao, label = filme)) +
geom_jitter(width = .05, alpha = .3, size = 3) +
labs(x = "")
ggplotly(p)
Agrupamento hierárquico
agrupamento_h = filmes %>%
mutate(nome = paste0(filme, " (av=", avaliacao, ")")) %>%
as.data.frame() %>%
column_to_rownames("filme") %>%
select(avaliacao) %>%
dist(method = "euclidian") %>%
hclust(method = "ward.D")
ggdendrogram(agrupamento_h, rotate = T, size = 2, theme_dendro = F) +
labs(y = "Dissimilaridade", x = "", title = "Dendrograma")

get_grupos <- function(agrupamento, num_grupos){
agrupamento %>%
cutree(num_grupos) %>%
as.data.frame() %>%
mutate(label = rownames(.)) %>%
gather(key = "k", value = "grupo", -label) %>%
mutate(grupo = as.character(grupo))
}
atribuicoes = get_grupos(agrupamento_h, num_grupos = 1:6)
atribuicoes = atribuicoes %>%
left_join(filmes, by = c("label" = "filme"))
atribuicoes %>%
ggplot(aes(x = "Filmes", y = avaliacao, colour = grupo)) +
geom_jitter(width = .02, height = 0, size = 1.6, alpha = .6) +
facet_wrap(~ paste(k, " grupos")) +
scale_color_brewer(palette = "Dark2")

k_escolhido = 3
atribuicoes %>%
filter(k == k_escolhido) %>%
ggplot(aes(x = reorder(label, avaliacao), y = avaliacao, colour = grupo)) +
geom_jitter(width = .02, height = 0, size = 3, alpha = .6) +
facet_wrap(~ paste(k, " grupos")) +
scale_color_brewer(palette = "Dark2") +
labs(x = "", y = "Avaliação RT") +
coord_flip()

LS0tCnRpdGxlOiAiVGlwb3MgZGUgZmlsbWUgZGUgSmFrZSBHeWxsZW5oYWFsIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwotLS0KCmBgYHtyIGVjaG89RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGhlcmUpCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoZ2dkZW5kcm8pCgpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9saWIuUiIpKQpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9wbG90YV9zb2x1Y29lc19oY2x1c3QuUiIpKQoKdGhlbWVfc2V0KHRoZW1lX3JlcG9ydCgpKQoKa25pdHI6Om9wdHNfY2h1bmskc2V0KHRpZHkgPSBGQUxTRSwKICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aCA9IDYsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0ID0gNSwKICAgICAgICAgICAgICAgICAgICAgIGVjaG8gPSBUUlVFKQoKcGFsZXRhID0gYygiIzQwNEU0RCIsCiAgICAgICAgICAgIiM5MkRDRTUiLAogICAgICAgICAgICIjOTM4QkExIiwKICAgICAgICAgICAiIzJEMzE0MiIsCiAgICAgICAgICAgIiNGNDc0M0IiKQpgYGAKCmBgYHtyIHJlYWR9CmltcG9ydF9kYXRhKCJqYWtlX2d5bGxlbmhhYWwiKSAKZmlsbWVzID0gcmVhZF9pbXBvcnRlZF9kYXRhKCkKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdsaW1wc2UoKQpgYGAKCgojIyBEYXRhIE92ZXJ2aWV3CgojIyMgQmlsaGV0ZXJpYQoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBhbm8sIHkgPSBiaWxoZXRlcmlhKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAKYGBgCgoKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYmlsaGV0ZXJpYSkpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSArCiAgICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzPXNlcSgwLDIxMCwxMCkpCmBgYAoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBncm91cF9ieShmaWxtZSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHNhbXBsZT1iaWxoZXRlcmlhKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KcCA9IGZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSAiIiwgeSA9IGJpbGhldGVyaWEsIGxhYmVsID0gZmlsbWUpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDUsIGFscGhhID0gLjMsIHNpemUgPSAzKSArIAogICAgbGFicyh4ID0gIiIpCgpnZ3Bsb3RseShwKQpgYGAKCiMjIyBBdmFsaWHDp8OjbwoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBhbm8sIHkgPSBhdmFsaWFjYW8pKSArIAogICAgZ2VvbV9wb2ludChzaXplID0gNCwgY29sb3IgPSBwYWxldGFbMV0pICArCiAgICBzY2FsZV95X2NvbnRpbnVvdXMobGltaXRzID0gYygwLCAxMDApKQpgYGAKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYXZhbGlhY2FvKSkgKyAKICAgIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMTAsIGJvdW5kYXJ5ID0gMCwgCiAgICAgICAgICAgICAgICAgICBmaWxsID0gcGFsZXRhWzNdLCBjb2xvciA9ICJibGFjayIpICsgCiAgICBnZW9tX3J1ZyhzaXplID0gLjUpIApgYGAKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ3JvdXBfYnkoZmlsbWUpICU+JQogICAgZ2dwbG90KGFlcyhzYW1wbGU9YXZhbGlhY2FvKSkgKyAKICAgIHN0YXRfcXEoKSAKYGBgCgpgYGB7cn0KcCA9IGZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSAiIiwgeSA9IGF2YWxpYWNhbywgbGFiZWwgPSBmaWxtZSkpICsgCiAgICBnZW9tX2ppdHRlcih3aWR0aCA9IC4wNSwgYWxwaGEgPSAuMywgc2l6ZSA9IDMpICsgCiAgICBsYWJzKHggPSAiIikKCmdncGxvdGx5KHApCmBgYAoKIyMgQWdydXBhbWVudG8gaGllcsOhcnF1aWNvCgpgYGB7cn0KYWdydXBhbWVudG9faCA9IGZpbG1lcyAlPiUgCiAgICBtdXRhdGUobm9tZSA9IHBhc3RlMChmaWxtZSwgIiAoYXY9IiwgYXZhbGlhY2FvLCAiKSIpKSAlPiUgCiAgICBhcy5kYXRhLmZyYW1lKCkgJT4lIAogICAgY29sdW1uX3RvX3Jvd25hbWVzKCJmaWxtZSIpICU+JSAKICAgIHNlbGVjdChhdmFsaWFjYW8pICU+JQogICAgZGlzdChtZXRob2QgPSAiZXVjbGlkaWFuIikgJT4lIAogICAgaGNsdXN0KG1ldGhvZCA9ICJ3YXJkLkQiKQoKZ2dkZW5kcm9ncmFtKGFncnVwYW1lbnRvX2gsIHJvdGF0ZSA9IFQsIHNpemUgPSAyLCB0aGVtZV9kZW5kcm8gPSBGKSArIAogICAgbGFicyh5ID0gIkRpc3NpbWlsYXJpZGFkZSIsIHggPSAiIiwgdGl0bGUgPSAiRGVuZHJvZ3JhbWEiKQpgYGAKCmBgYHtyfQpnZXRfZ3J1cG9zIDwtIGZ1bmN0aW9uKGFncnVwYW1lbnRvLCBudW1fZ3J1cG9zKXsKICAgIGFncnVwYW1lbnRvICU+JSAKICAgICAgICBjdXRyZWUobnVtX2dydXBvcykgJT4lIAogICAgICAgIGFzLmRhdGEuZnJhbWUoKSAlPiUgCiAgICAgICAgbXV0YXRlKGxhYmVsID0gcm93bmFtZXMoLikpICU+JSAKICAgICAgICBnYXRoZXIoa2V5ID0gICJrIiwgdmFsdWUgPSAiZ3J1cG8iLCAtbGFiZWwpICU+JSAKICAgICAgICBtdXRhdGUoZ3J1cG8gPSBhcy5jaGFyYWN0ZXIoZ3J1cG8pKQp9CgphdHJpYnVpY29lcyA9IGdldF9ncnVwb3MoYWdydXBhbWVudG9faCwgbnVtX2dydXBvcyA9IDE6NikKCmF0cmlidWljb2VzID0gYXRyaWJ1aWNvZXMgJT4lIAogICAgbGVmdF9qb2luKGZpbG1lcywgYnkgPSBjKCJsYWJlbCIgPSAiZmlsbWUiKSkKCmF0cmlidWljb2VzICU+JSAKICAgIGdncGxvdChhZXMoeCA9ICJGaWxtZXMiLCB5ID0gYXZhbGlhY2FvLCBjb2xvdXIgPSBncnVwbykpICsgCiAgICBnZW9tX2ppdHRlcih3aWR0aCA9IC4wMiwgaGVpZ2h0ID0gMCwgc2l6ZSA9IDEuNiwgYWxwaGEgPSAuNikgKyAKICAgIGZhY2V0X3dyYXAofiBwYXN0ZShrLCAiIGdydXBvcyIpKSArIAogICAgc2NhbGVfY29sb3JfYnJld2VyKHBhbGV0dGUgPSAiRGFyazIiKQoKYGBgCgpgYGB7cn0Ka19lc2NvbGhpZG8gPSAzCgphdHJpYnVpY29lcyAlPiUgCiAgICBmaWx0ZXIoayA9PSBrX2VzY29saGlkbykgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gcmVvcmRlcihsYWJlbCwgYXZhbGlhY2FvKSwgeSA9IGF2YWxpYWNhbywgY29sb3VyID0gZ3J1cG8pKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDIsIGhlaWdodCA9IDAsIHNpemUgPSAzLCBhbHBoYSA9IC42KSArIAogICAgZmFjZXRfd3JhcCh+IHBhc3RlKGssICIgZ3J1cG9zIikpICsgCiAgICBzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZSA9ICJEYXJrMiIpICsgCiAgICBsYWJzKHggPSAiIiwgeSA9ICJBdmFsaWHDp8OjbyBSVCIpICsgCiAgICBjb29yZF9mbGlwKCkgCmBgYAoKIyMgQ29tIGR1YXMgZGltZW5zw7VlcwoKYGBge3J9CmFncnVwYW1lbnRvX2hfMmQgPSBmaWxtZXMgJT4lIAogICAgbXV0YXRlKGJpbGhldGVyaWEgPSBsb2cxMChiaWxoZXRlcmlhKSkgJT4lCiAgICBtdXRhdGVfYXQodmFycyhhdmFsaWFjYW8sIGJpbGhldGVyaWEpLCBmdW5zKHNjYWxlKSkgJT4lCiAgICBjb2x1bW5fdG9fcm93bmFtZXMoImZpbG1lIikgJT4lCiAgICBzZWxlY3QoYXZhbGlhY2FvLCBiaWxoZXRlcmlhKSAlPiUKICAgIGRpc3QobWV0aG9kID0gImV1Y2xpZGVhbiIpICU+JSAKICAgIGhjbHVzdChtZXRob2QgPSAid2FyZC5EIikgICAgCgpnZ2RlbmRyb2dyYW0oYWdydXBhbWVudG9faF8yZCwgcm90YXRlID0gVFJVRSwgdGhlbWVfZGVuZHJvID0gRikKYGBgCgpgYGB7cn0KYWdydXBhbWVudG9faF8yZCA9IGZpbG1lcyAlPiUKICAgbXV0YXRlKGJpbGhldGVyaWEgPSBsb2cxMChiaWxoZXRlcmlhKSkgJT4lCiAgIG11dGF0ZV9hdCh2YXJzKCJhdmFsaWFjYW8iLCAiYmlsaGV0ZXJpYSIpLCBmdW5zKHNjYWxlKSkgJT4lCiAgIGNvbHVtbl90b19yb3duYW1lcygiZmlsbWUiKSAlPiUKICAgc2VsZWN0KCJhdmFsaWFjYW8iLCAiYmlsaGV0ZXJpYSIpICU+JQogICBkaXN0KG1ldGhvZCA9ICJldWNsaWRlYW4iKSAlPiUKICAgaGNsdXN0KG1ldGhvZCA9ICJjZW50cm9pZCIpCgpnZ2RlbmRyb2dyYW0oYWdydXBhbWVudG9faF8yZCwgcm90YXRlID0gVFJVRSwgdGhlbWVfZGVuZHJvID0gRikKYGBgCgpgYGB7cn0KZmlsbWVzMiA9IGZpbG1lcyAlPiUgbXV0YXRlKGJpbGhldGVyaWEgPSBsb2cxMChiaWxoZXRlcmlhKSkKcGxvdGFfaGNsdXN0c18yZChhZ3J1cGFtZW50b19oXzJkLAogICAgICAgICAgICAgICAgZmlsbWVzMiwKICAgICAgICAgICAgICAgIGMoImF2YWxpYWNhbyIsICJiaWxoZXRlcmlhIiksCiAgICAgICAgICAgICAgICBsaW5rYWdlX21ldGhvZCA9ICJjZW50cm9pZCIsIGtzID0gMTo2KSArCiAgIHNjYWxlX3lfbG9nMTAoKSArCiAgIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikKYGBg